* begin automatic packages checker
* credit https://larsvilhuber.github.io/self-checking-reproducibility/20-reproducing-environments.html

* Set the root directory

global rootdir : pwd

* Define a location where we will hold all packages in THIS project (the "environment")

global adodir "$rootdir/ado"

* make sure it exists, if not create it.

cap mkdir "$adodir"

* Now let's simplify the adopath
* - remove the OLDPLACE and PERSONAL paths
* - NEVER REMOVE THE SYSTEM-WIDE PATHS - bad things will happen!

adopath - OLDPLACE
adopath - PERSONAL

* modify the PLUS path to point to our new location, and move it up in the order

sysdir set PLUS "$adodir"
adopath ++ PLUS

* verify the path

adopath

* end automatic packages checker

* begin install required packages
ssc install estout
ssc install ivreg2
ssc install ranktest
ssc install pdslasso
ssc install lassopack
ssc install winsor 
* end install required packages

timer clear 1
timer on 1

// preamble from Mayshar et al

use  "DATASET_ETHNOATLAS.dta", clear

datasignature
assert r(datasignature) == "1260:149(70423):3060543911:666054547"

keep if year~=. 
keep if year>=1500
keep if Barley_point_cal!=. /*Delete observations without agricultural data*/

**generate main regressors		

gen L_hier1=V33 if V33~=0           /*Jurisdictional Hierarchy Beyond Local Community: 0(missing) 1 (no political authority beyond community), 2 (one level) to 5 (for levels)*/

gen L_crop_cereals=1 if V29==6
replace L_crop_cereals=0 if L_crop_cereals==. & V29~=. & V29~=0 
gen cereals=L_crop_cereals

gen L_surplus=0 if TUDEN_MARSHALL_v93==1
replace L_surplus=1 if TUDEN_MARSHALL_v93<10 & TUDEN_MARSHALL_v93>1

gen Tax_burden=0 if SCSS_v1737==1 
replace Tax_burden=1 if SCSS_v1737==2 | SCSS_v1737==3  | SCSS_v1737==6
replace Tax_burden=2 if SCSS_v1737==4 | SCSS_v1737==5  | SCSS_v1737==7
label var Tax_burden "=0 no tax; =1 small tax; =2 regular and relevant tax"

gen depend_agric = (5-0)/2 if V5==0
replace depend_agric = 6+(15-6)/2   if V5==1
replace depend_agric = 16+(25-16)/2   if V5==2
replace depend_agric = 26+(35-26)/2   if V5==3
replace depend_agric = 36+(45-36)/2   if V5==4
replace depend_agric = 46+(55-46)/2   if V5==5
replace depend_agric = 56+(65-56)/2   if V5==6
replace depend_agric = 66+(75-66)/2   if V5==7
replace depend_agric = 76+(85-76)/2   if V5==8
replace depend_agric = 86+(100-86)/2   if V5==9
replace depend_agric = depend_agric/100

gen Cal_positive_plow1=max(Wheat_point_cal, Barley_point_cal ,Rye_point_cal)				
gen Cal_negative_plow1=max(Foxtail_Millet_point_cal , Pearl_Millet_point_cal  ,Sorghum_point_cal)	
gen Plow_advantage1 =Cal_positive_plow1-Cal_negative_plow1  

gen Animal_husbandry1b=1 if V4>0 & V4!=.
replace Animal_husbandry1b=0 if V4==0
tabulate Animal_husbandry1b, gen(dummy_Animal_husbandry1b_)

gen     Animal_cultivation2=(V39==2 | V39==3) if V39!=0
tabulate Animal_cultivation2, gen(dummy_Animal_cultivation2_)

gen Ruminants=1 if  V40==3 | V40==7
replace Ruminants=0 if Ruminants==.

gen max_cal_point_c1=max(Barley_point_cal, Buckwheat_point_cal , Dryland_Rice_point_cal ,Foxtail_Millet_point_cal, Maize_point_cal ,Oat_point_cal ,Pearl_Millet_point_cal, Rye_point_cal ,Sorghum_point_cal, Wetland_Rice_point_cal, Wheat_point_cal )
gen max_cal_point_t=max(Cassava_point_cal ,Sweet_Potato_point_cal, White_Potato_point_cal, Yams_point_cal)

label var max_cal_point_c1  "cal/ha cereal with MaxCalYield (on a circle(5cell radius)around Ethnoatlas point)(LowInput RainFed)"	
label var max_cal_point_t 	"cal/ha root/tub with MaxCalYield (on a circle(5cell radius)around Ethnoatlas point)(LowInput RainFed)"	

gen diff= max_cal_point_c1- max_cal_point_t
gen max = max(max_cal_point_c1,  max_cal_point_t)

egen std_max=std(max)
egen std_diff=std(diff)

gen sample_non_desertic=1 if  max_cal_point_t~=0 & max_cal_point_c1 ~=0 	/*non-desertic points; cereals and roots/tubers can be planted (Murdock points)*/

gen hierarchyA="1.tribe" if L_hier1==1
replace hierarchyA="2.small chiefd" if L_hier1==2
replace hierarchyA="3.large chiefd" if L_hier1==3
replace hierarchyA="4.small state" if L_hier1==4
replace hierarchyA="5.large state" if L_hier1==5

egen rainmean_std=std(rainmean) 
egen atemp_std=std(atemp) 
egen elevation_std=std(elevation) 
egen ruggedness_std=std(ruggedness) 
egen abslat_std=std(abslat) 
egen river_std=std(river) 
egen distcoast_std=std(distcoast) 
egen popd95_std=std(popd95) 
egen histpopd_std=std(histpopd)  
egen historicpopdB_std=std(historicpopdB)  
egen Plow_advantage1_std=std(Plow_advantage1) 
gen Plow_advantage1_std_7squared= Plow_advantage1_std*Plow_advantage1_std
egen ramank_std=std(ramank) 
egen suit_GalOz_std=std(suit_GalOz) 
egen irrigation_dep_std=std(irrigation_dep) 

tab dummy_continental3, gen(continent3_d)

tab dummy_continental_ref1, gen(continental_ref1_d) 

keep if sample_non_desertic==1

gen const=1

* labeling preamble

label var cereals "CerMain"
label var std_max "LandProd"
label var depend_agric "Dep. Agri."
label var std_diff "CerAdv"

*********************************************************************************************************************************
* we want to winsorize only on the N=952 that are included in the later estimations!
ivreg2  L_hier1  (cereals =  std_diff) std_max 		i.dummy_continental3, cluster(iso)
keep if e(sample)
*********************************************************************************************************************************

* IN TEXT CLAIM

* Outlier Detection
summarize std_diff, detail
count if std_diff < (r(p25)-(r(p75)-r(p25))*1.5)
display "`r(N)' of societies are left-outliers."

summarize std_diff, detail
count if std_diff > (r(p75)+(r(p75)-r(p25))*1.5) // 22 socieites are Outliers
display "`r(N)' of societies are right-outliers."

* IN TEXT CLAIM FOR granularity of winsorization, one by one winsorization
* where 20/952 is 0.021

eststo clear
forvalues i = 19(1)23{
	capture drop w_std_diff
	winsor std_diff, gen(w_std_diff) h(`i') highonly // notice the h here not p

local varcontrols "std_max continent3_d1 continent3_d2 continent3_d3 continent3_d4 continent3_d5 continent3_d6 rainmean_std atemp_std elevation_std ruggedness_std abslat_std latitude longitude river_std distcoast_std histpopd_std Plow_advantage1_std Plow_advantage1_std_7squared 	dummy_Animal_husbandry1b_1 dummy_Animal_husbandry1b_2 Ruminants dummy_Animal_cultivation2_2 irrigation_dep_std"

eststo: ivreg2  L_hier1  (cereals =  w_std_diff) std_max 		i.dummy_continental3, cluster(iso)	
	count if e(sample) & w_std_diff!=std_diff
	estadd scalar myratio = r(N)

	ivlasso L_hier1 (cereals=w_std_diff) (`varcontrols'),     sqrt pnotpen(w_std_diff) cluster(iso) idstats
	local selected_regressors= e(xselected)
	local selected_regressors2= "`selected_regressors'"
	macro list
	capture drop missing_var
	gen missing_var=0
	foreach vari of local  varcontrols {
	replace missing_var=1 if missing(`vari')==1
	}
}

esttab , ///
b(3) p(3) stats(N widstat myratio /*r2*/, fmt(%9.0g %9.2f %9.0g) labels("Observations" "F-Statistic" "Wins. Societies")) starl(* 0.10 ** 0.05 *** 0.01)  keep(cereals *std_max* ) ///
indicate("Continent FE = *continent*") ///
mgroups() ///
mtitles() label nonotes


*********************************************************************************************************************************
* FIGURE 1

_pctile std_diff, p(95 97 98 99) // the 95, 97, 98, 99 percentiles
return list

di `r(r1)'

local x1=-1*`r(r1)' // let's also store the negative of these cuts and display them too to show the missing on the other end
di `x1'
local x2=-1*`r(r2)'
local x3=-1*`r(r3)'
local x4=-1*`r(r4)'

hist std_diff if L_hier1!=., start(-2.5) width(0.25) xaxis(1 2) xlabel(-4(1)4, nogrid) ylabel(0(50)250, nogrid)  ///
xline(`x1' `x2' `x3' `x4', lcolor(gs12)) ///
xlabel(`x1' "5%" `x2' "3%"  `x3' "2%" `x4'  "1%" `r(r1)' "5%" `r(r2)' "3%" `r(r3)' "2%" `r(r4)' "1%", axis(2)) xtitle("", axis(2)) ///
xline(`r(r1)' `r(r2)' `r(r3)' `r(r4)' ) ///
 addlabel addlabopts(mlabposition(12)) freq 
graph export "fig_1.png", replace

*********************************************************************************************************************************

*********************************************************************************************************************************
* TABLE 1 

eststo clear

eststo: ivreg2  L_hier1  (cereals =  std_diff) std_max i.dummy_continental3  		,cluster(iso)		 	

eststo: ivreg2  L_hier1  (cereals =  std_diff) 	std_max depend_agric i.dummy_continental3  if sample_non_desertic==1,cluster(iso)		 	

forval i = 1(1)4{
	capture drop indicator
	gen indicator = .
	replace indicator = 0 if L_hier1<=`i' & L_hier1!=.
	replace indicator = 1 if L_hier1>`i' & L_hier1!=.
	
	eststo: ivreg2  indicator  (cereals =  std_diff) 	std_max depend_agric i.dummy_continental3 if sample_non_desertic==1,cluster(iso)		 	

}

esttab , ///
b(3) se(3) label stats(N widstat, label("Obs." "F-statistic") fmt(%11.0gc %11.2f)) star(* 0.10 ** 0.05 *** 0.01) keep() nocons nonotes  ///
mgroups("MMP" "5-Scale" "Ind. for above level" "Ind. for one above level",pattern(1 1 1 0 0 0 1 0 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span ) ///
mtitles("MMP" "5-Scale" "Tribe" "S. Chf." "L. Chf" "S. State" "Tribe" "S. Chf." "L. Chf" "S. State") ///
indicate( ///
"Cont. FE = *dummy_continental3* " ///
, labels("Y" " "))

esttab using "table_1.tex", replace ///
b(3) se(3) label stats(N widstat, label("Observations" "F-statistic") fmt(%11.0gc %11.2f)) star(* 0.10 ** 0.05 *** 0.01) keep() nocons nonotes  ///
mgroups("MMP's Continuous" "Indicator for above...",pattern(1 0 1 0 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span ) ///
mtitles(" " " " "Tribe" "S. Chf." "L. Chf" "S. State" "Tribe" "S. Chf." "L. Chf" "S. State") ///
indicate( ///
"Cont. FE = *dummy_continental3* " ///
, labels("Y" " "))
*********************************************************************************************************************************

*********************************************************************************************************************************
* TABLE 2

eststo clear

local varcontrols "std_max continent3_d1 continent3_d2 continent3_d3 continent3_d4 continent3_d5 continent3_d6 rainmean_std atemp_std elevation_std ruggedness_std abslat_std latitude longitude river_std distcoast_std histpopd_std Plow_advantage1_std Plow_advantage1_std_7squared 	dummy_Animal_husbandry1b_1 dummy_Animal_husbandry1b_2 Ruminants dummy_Animal_cultivation2_2 irrigation_dep_std"

// no Winsor

capture drop w_std_diff
gen w_std_diff = std_diff

eststo: ivreg2  L_hier1  (cereals =  std_diff) std_max 		i.dummy_continental3, cluster(iso)
	count if e(sample) & w_std_diff!=std_diff
	estadd scalar myratio = r(N)

	ivlasso L_hier1 (cereals=w_std_diff) (`varcontrols'),     sqrt pnotpen(w_std_diff) cluster(iso) idstats
	local selected_regressors= e(xselected)
	local selected_regressors2= "`selected_regressors'"
	macro list
	capture drop missing_var
	gen missing_var=0
	foreach vari of local  varcontrols {
	replace missing_var=1 if missing(`vari')==1
	}
eststo: ivreg2  L_hier1  (cereals =  std_diff ) 	`selected_regressors2' if  missing_var==0, cluster(iso) ffirst

// Loop Over the Winsorizations
foreach i in 0.01 0.02 0.03 0.05 {
	di `i'
	
	capture drop w_std_diff
	winsor std_diff, gen(w_std_diff) p(`i') highonly

eststo: ivreg2  L_hier1  (cereals =  w_std_diff) std_max 		i.dummy_continental3, cluster(iso)	
	count if e(sample) & w_std_diff!=std_diff
	estadd scalar myratio = r(N)

	ivlasso L_hier1 (cereals=w_std_diff) (`varcontrols'),     sqrt pnotpen(w_std_diff) cluster(iso) idstats
	local selected_regressors= e(xselected)
	local selected_regressors2= "`selected_regressors'"
	macro list
	capture drop missing_var
	gen missing_var=0
	foreach vari of local  varcontrols {
	replace missing_var=1 if missing(`vari')==1
	}
eststo: ivreg2  L_hier1  (cereals =  w_std_diff ) 	`selected_regressors2' if  missing_var==0, cluster(iso) ffirst
	
	
}

esttab , ///
b(3) p(3) stats(N widstat myratio /*r2*/, fmt(%9.0g %9.2f %9.0g) labels("Observations" "F-Statistic" "Wins. Societies")) starl(* 0.10 ** 0.05 *** 0.01)  keep(cereals *std_max* ) ///
indicate("Continent FE = *continent*") ///
mgroups("No Winsor" "Top 1\%" "Top 2\%" "Top 3\%" "Top 5\%", pattern(1 0 1 0 1 0 1 0 1 0)  prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat()) ///
mtitles("2SLS" "2SLSPDS" "2SLS" "2SLSPDS" "2SLS" "2SLSPDS" "2SLS" "2SLSPDS" "2SLS" "2SLSPDS") label nonotes

esttab using "table_2.tex" , replace ///
b(3) p(3) stats(N widstat myratio /*r2*/, fmt(%9.0g %9.2f %9.0g) labels("Observations" "F-Statistic" "Wins. Societies")) starl(* 0.10 ** 0.05 *** 0.01)  keep(cereals std_max ) ///
indicate("Continent FE = *continent*") ///
mgroups("No Winsor" "Top 1\%" "Top 2\%" "Top 3\%" "Top 5\%", pattern(1 0 1 0 1 0 1 0 1 0)  prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat()) ///
mtitles("2SLS" "2SLSPDS" "2SLS" "2SLSPDS" "2SLS" "2SLSPDS" "2SLS" "2SLSPDS" "2SLS" "2SLSPDS") label nonotes

*********************************************************************************************************************************

*********************************************************************************************************************************
* TABLE A1

eststo clear

local varcontrols "std_max continent3_d1 continent3_d2 continent3_d3 continent3_d4 continent3_d5 continent3_d6 rainmean_std atemp_std elevation_std ruggedness_std abslat_std latitude longitude river_std distcoast_std histpopd_std Plow_advantage1_std Plow_advantage1_std_7squared 	dummy_Animal_husbandry1b_1 dummy_Animal_husbandry1b_2 Ruminants dummy_Animal_cultivation2_2 irrigation_dep_std"

// no Winsor

capture drop w_std_diff
gen w_std_diff = std_diff

eststo: ivreg2  L_hier1  (cereals =  std_diff) std_max 		i.dummy_continental3, cluster(iso)
	count if e(sample) & w_std_diff!=std_diff
	estadd scalar myratio = r(N)

	ivlasso L_hier1 (cereals=w_std_diff) (`varcontrols'),     sqrt pnotpen(w_std_diff) cluster(iso) idstats
	local selected_regressors= e(xselected)
	local selected_regressors2= "`selected_regressors'"
	macro list
	capture drop missing_var
	gen missing_var=0
	foreach vari of local  varcontrols {
	replace missing_var=1 if missing(`vari')==1
	}
eststo: ivreg2  L_hier1  (cereals =  std_diff ) 	`selected_regressors2' if  missing_var==0, cluster(iso) ffirst

// Loop Over the Winsorizations
foreach i in 0.01 0.02 0.03 0.05 {
	di `i'
	
	capture drop w_std_diff
	winsor std_diff, gen(w_std_diff) p(`i') highonly
	replace w_std_diff=. if w_std_diff!=std_diff

eststo: ivreg2  L_hier1  (cereals =  w_std_diff) std_max 		i.dummy_continental3, cluster(iso)	
	count if e(sample) & w_std_diff!=std_diff
	estadd scalar myratio = r(N)

	ivlasso L_hier1 (cereals=w_std_diff) (`varcontrols'),     sqrt pnotpen(w_std_diff) cluster(iso) idstats
	local selected_regressors= e(xselected)
	local selected_regressors2= "`selected_regressors'"
	macro list
	capture drop missing_var
	gen missing_var=0
	foreach vari of local  varcontrols {
	replace missing_var=1 if missing(`vari')==1
	}
eststo: ivreg2  L_hier1  (cereals =  w_std_diff ) 	`selected_regressors2' if  missing_var==0, cluster(iso) ffirst

}

esttab , ///
b(3) p(3) stats(N widstat  /*r2*/, fmt(%9.0g %9.2f %9.0g) labels("Observations" "F-Statistic" )) starl(* 0.10 ** 0.05 *** 0.01)  keep(cereals *std_max* ) ///
indicate("Continent FE = *continent*") ///
mgroups("No Trimming" "Top 1\%" "Top 2\%" "Top 3\%" "Top 5\%", pattern(1 0 1 0 1 0 1 0 1 0)  prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat()) ///
mtitles("2SLS" "2SLSPDS" "2SLS" "2SLSPDS" "2SLS" "2SLSPDS" "2SLS" "2SLSPDS" "2SLS" "2SLSPDS") label nonotes

esttab using "table_a1.tex" , replace ///
b(3) p(3) stats(N widstat  /*r2*/, fmt(%9.0g %9.2f %9.0g) labels("Observations" "F-Statistic" )) starl(* 0.10 ** 0.05 *** 0.01)  keep(cereals std_max ) ///
indicate("Continent FE = *continent*") ///
mgroups("No Trimming" "Top 1\%" "Top 2\%" "Top 3\%" "Top 5\%", pattern(1 0 1 0 1 0 1 0 1 0)  prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat()) ///
mtitles("2SLS" "2SLSPDS" "2SLS" "2SLSPDS" "2SLS" "2SLSPDS" "2SLS" "2SLSPDS" "2SLS" "2SLSPDS") label nonotes

*********************************************************************************************************************************

timer off 1

timer list 1
